# 爬取5A信息
# 更新省字段到景点表

# https://touch.piao.qunar.com/touch/detail_2283.html?from=t_yzzr_poi&cat=from_area=t_yzzr_poi&dist_city=阿坝藏族羌族自治州

import os
import re
import sqlite3
from lxml import etree
import requests

BASE_DIR = os.path.dirname(os.path.abspath(__file__))  # 当前脚本所在目录
DB_PATH = os.path.join(BASE_DIR, "..", "mysite", "db.sqlite3")

conn = sqlite3.connect(DB_PATH)  # 连接 SQLite 文件
cursor = conn.cursor()

cursor.execute("SELECT * FROM app_travelinfo;")  # 执行查询
rows = cursor.fetchall()  # 获取所有结果

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
}

for row in rows:
    dist_city = row[1]
    sightId = row[4]
    id = row[0]

    url = "https://touch.piao.qunar.com/touch/detail_{}.html?from=t_yzzr_poi&cat=from_area=t_yzzr_poi&dist_city={}".format(
        sightId, dist_city
    )

    cursor.execute("SELECT province FROM app_city WHERE city = ?;", (dist_city,))
    rows = cursor.fetchall()  # 获取所有结果

    province = ""
    for row in rows:
        province = row[0]

    
    response = requests.get(url, headers=headers)
    root = etree.HTML(response.text)

    level = ""
    try:
        title = root.xpath('//div[@class="mp-headfeagure-title"]/text()')[0]
        print(title)
        match = re.search(r'\((A+)景区\)', title)
        if match:
            level = match.group(1)
            # print(level)
    except:
        pass
    
    cursor.execute("UPDATE app_travelinfo SET level = ?, province = ? WHERE id = ? ", (level, province, id))

    conn.commit()  # 一定要 commit 才会生效

    # break

conn.close()
